{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "6bdfbd41",
   "metadata": {},
   "source": [
    "This sample code was written by Tanmoy Debnath and Manvik Nanda; it was edited and reused in different iterations (filling in different list dates, and Wayback Machine links) to assist the collection of the full Social Blade listings of top TikTokers (2020-2023) and YouTubers (2012-2023) as archived in the Internet Archive's Wayback Machine, at these urls: \n",
    "\n",
    "https://web.archive.org/web/20230327030031/http://socialblade.com/youtube/top/100\n",
    "https://web.archive.org/web/20250000000000*/https://socialblade.com/tiktok/top/100"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "97be6a0b",
   "metadata": {},
   "source": [
    "For TikTok"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "874d03d4",
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "from bs4 import BeautifulSoup\n",
    "import pandas as pd\n",
    "import os\n",
    "\n",
    "# CSV file path\n",
    "csv_file = \"tiktok_data.csv\"\n",
    "\n",
    "# Check if the CSV file exists\n",
    "if os.path.exists(csv_file):\n",
    "    # Read the existing data\n",
    "    df = pd.read_csv(csv_file)\n",
    "else:\n",
    "    # Initialize a new DataFrame if the file does not exist\n",
    "    df = pd.DataFrame(columns=[\"tiktok_name\", \"tiktok_url\", \"date\"])\n",
    "\n",
    "# List of URLs (with Dates for \"Date\" column values)\n",
    "data = [\n",
    "    {\n",
    "        \"date\": \"03-Jul-2020\",\n",
    "        \"url\": \"https://web.archive.org/web/20200703141555/https://socialblade.com/tiktok/top/100/\"\n",
    "    },\n",
    "    {\n",
    "        \"date\": \"17-Jul-2020\",\n",
    "        \"url\": \"https://web.archive.org/web/20200717190952/https://socialblade.com/tiktok/top/100/\",\n",
    "    },\n",
    "    {\n",
    "        \"date\": \"30-Sep-2020\",\n",
    "        \"url\": \"https://web.archive.org/web/20200930163641/https://socialblade.com/tiktok/top/100/\",\n",
    "    },\n",
    "    {\n",
    "        \"date\": \"20-Jan-2021\",\n",
    "        \"url\": \"https://web.archive.org/web/20210120050631/https://socialblade.com/tiktok/top/100/\",\n",
    "    },\n",
    "    {\n",
    "        \"date\": \"17-Mar-2021\",\n",
    "        \"url\": \"https://web.archive.org/web/20210317112425/https://socialblade.com/tiktok/top/100/\"\n",
    "    },\n",
    "    {\n",
    "        \"date\": \"12-Apr-2021\",\n",
    "        \"url\" : \"https://web.archive.org/web/20210412014003/https://socialblade.com/tiktok/top/100/\"\n",
    "    },\n",
    "    {\n",
    "        \"date\": \"06-May-2021\",\n",
    "        \"url\": \"https://web.archive.org/web/20210506163750/https://socialblade.com/tiktok/top/100/\"\n",
    "    },\n",
    "    {\n",
    "        \"date\": \"07-May-2021\",\n",
    "        \"url\": \"https://web.archive.org/web/20210507220142/https://socialblade.com/tiktok/top/100/\",\n",
    "    },\n",
    "    {\n",
    "        \"date\": \"20-Jun-2021\",\n",
    "        \"url\": \"https://web.archive.org/web/20210620112039/https://socialblade.com/tiktok/top/100/\"\n",
    "    },\n",
    "    {\n",
    "        \"date\": \"11-Aug-2021\",\n",
    "        \"url\": \"https://web.archive.org/web/20210811203500/https://socialblade.com/tiktok/top/100/\"\n",
    "    },\n",
    "    {\n",
    "        \"date\": \"12-Aug-2021\",\n",
    "        \"url\": \"https://web.archive.org/web/20210812064047/https://socialblade.com/tiktok/top/100/\"\n",
    "    },\n",
    "    {\n",
    "        \"date\": \"27-Aug-2021\",\n",
    "        \"url\": \"https://web.archive.org/web/20210827054355/https://socialblade.com/tiktok/top/100/\"\n",
    "    },\n",
    "    {\n",
    "        \"date\": \"30-Sep-2021\",\n",
    "        \"url\": \"https://web.archive.org/web/20210930111901/https://socialblade.com/tiktok/top/100/\",\n",
    "    },\n",
    "    {\n",
    "        \"date\": \"09-Oct-2021\",\n",
    "        \"url\": \"https://web.archive.org/web/20211009142925/https://socialblade.com/tiktok/top/100/\"\n",
    "    },\n",
    "    {\n",
    "        \"date\": \"15-Oct-2021\",\n",
    "        \"url\": \"https://web.archive.org/web/20211015115706/https://socialblade.com/tiktok/top/100/\"\n",
    "    },\n",
    "    {\n",
    "        \"date\": \"27-Oct-2021\",\n",
    "        \"url\": \"https://web.archive.org/web/20211027110039/https://socialblade.com/tiktok/top/100/\"\n",
    "    },\n",
    "    {\n",
    "        \"date\": \"09-Nov-2021\",\n",
    "        \"url\": \"https://web.archive.org/web/20211109022951/https://socialblade.com/tiktok/top/100/\"\n",
    "    },\n",
    "    {\n",
    "        \"date\": \"12-Dec-2021\",\n",
    "        \"url\": \"https://web.archive.org/web/20211212001448/https://socialblade.com/tiktok/top/100/\"\n",
    "    },\n",
    "    {\n",
    "        \"date\": \"19-Dec-2021\",\n",
    "        \"url\": \"https://web.archive.org/web/20211219233127/https://socialblade.com/tiktok/top/100/\"\n",
    "    },\n",
    "    {\n",
    "        \"date\": \"08-Feb-2022\",\n",
    "        \"url\": \"https://web.archive.org/web/20220208035323/https://socialblade.com/tiktok/top/100/\",\n",
    "    },\n",
    "    {\n",
    "        \"date\": \"18-Mar-2022\",\n",
    "        \"url\": \"https://web.archive.org/web/20220318214322/https://socialblade.com/tiktok/top/100/\",\n",
    "    },\n",
    "    {\n",
    "        \"date\": \"15-Apr-2022\",\n",
    "        \"url\": \"https://web.archive.org/web/20220415171849/https://socialblade.com/tiktok/top/100/\",\n",
    "    },\n",
    "    {\n",
    "        \"date\": \"03-May-2022\",\n",
    "        \"url\": \"https://web.archive.org/web/20220503163203/https://socialblade.com/tiktok/top/100/\",\n",
    "    },\n",
    "    {\n",
    "        \"date\": \"30-May-2022\",\n",
    "        \"url\": \"https://web.archive.org/web/20220530012644/https://socialblade.com/tiktok/top/100/\",\n",
    "    },\n",
    "    {\n",
    "        \"date\": \"20-Jul-2022\",\n",
    "        \"url\": \"https://web.archive.org/web/20220720123236/https://socialblade.com/tiktok/top/100/\",\n",
    "    },\n",
    "    {\n",
    "        \"date\": \"30-Sep-2022\",\n",
    "        \"url\": \"https://web.archive.org/web/20220930070529/https://socialblade.com/tiktok/top/100/\",\n",
    "    },\n",
    "    {\n",
    "        \"date\": \"10-Nov-2022\",\n",
    "        \"url\": \"https://web.archive.org/web/20221110154008/https://socialblade.com/tiktok/top/100/\",\n",
    "    },\n",
    "    {\n",
    "        \"date\": \"11-April-2023\",\n",
    "        \"url\": \"https://web.archive.org/web/20230411091255/https://socialblade.com/tiktok/top/100/\",\n",
    "    },\n",
    "    {\n",
    "        \"date\": \"13-May-2023\",\n",
    "        \"url\": \"https://web.archive.org/web/20230513081219/https://socialblade.com/tiktok/top/100/\",\n",
    "    },\n",
    "    {\n",
    "        \"date\": \"20-May-2023\",\n",
    "        \"url\": \"https://web.archive.org/web/20230520033808/https://socialblade.com/tiktok/top/100/\",\n",
    "    },\n",
    "    {\n",
    "        \"date\": \"26-May-2023\",\n",
    "        \"url\": \"https://web.archive.org/web/20230526223333/https://socialblade.com/tiktok/top/100/\",\n",
    "    },\n",
    "    {\n",
    "        \"date\": \"10-July-2023\",\n",
    "        \"url\": \"https://web.archive.org/web/20230710121203/https://socialblade.com/tiktok/top/100/\",\n",
    "    },\n",
    "    {\n",
    "        \"date\": \"12-July-2023\",\n",
    "        \"url\": \"https://web.archive.org/web/20230712163357/https://socialblade.com/tiktok/top/100/\",\n",
    "    },\n",
    "    {\n",
    "        \"date\": \"01-Sep-2023\",\n",
    "        \"url\": \"https://web.archive.org/web/20230901192850/https://socialblade.com/tiktok/top/100/\",\n",
    "    },\n",
    "    {\n",
    "        \"date\": \"02-Dec-2023\",\n",
    "        \"url\": \"https://web.archive.org/web/20231202184626/https://socialblade.com/tiktok/top/100/\",\n",
    "    },\n",
    "    {\n",
    "        \"date\": \"13-Dec-2023\",\n",
    "        \"url\": \"https://web.archive.org/web/20231213200422/https://socialblade.com/tiktok/top/100/\",\n",
    "    },\n",
    "    {\n",
    "        \"date\": \"27-Dec-2023\",\n",
    "        \"url\": \"https://web.archive.org/web/20231227041601/https://socialblade.com/tiktok/top/100/\",\n",
    "    },\n",
    "    ]\n",
    "# Looping over the list of URLs\n",
    "for entry in data:\n",
    "    url = entry[\"url\"]\n",
    "    date = entry[\"date\"]\n",
    "\n",
    "    # Make a request to the URL\n",
    "    response = requests.get(url)\n",
    "    soup = BeautifulSoup(response.text, \"html.parser\")\n",
    "\n",
    "    # Extract tiktok_names and tiktok_urls\n",
    "    names = [\n",
    "        name.get_text(strip=True)\n",
    "        for name in soup.select('div[style*=\"float: left; width: 200px;\"] > a')\n",
    "    ]\n",
    "    urls = [\n",
    "        url[\"href\"]\n",
    "        for url in soup.select('div[style*=\"float: left; width: 200px;\"] > a')\n",
    "    ]\n",
    "\n",
    "    # Update DataFrame based on the extracted information\n",
    "    for name, url in zip(names, urls):\n",
    "        # Transform tiktok_url to obtain the direct URL to the user\n",
    "        transformed_url = f\"https://www.tiktok.com/@{url.split('/')[-1]}\"\n",
    "\n",
    "        # Check if the URL already exists in the DataFrame\n",
    "        if not df[df[\"tiktok_url\"] == transformed_url].empty:\n",
    "            # Find the index of the existing entry\n",
    "            idx = df.index[df[\"tiktok_url\"] == transformed_url][0]\n",
    "            # Update the date column for the existing entry\n",
    "            df.at[idx, \"date\"] = \", \".join([df.at[idx, \"date\"], date])\n",
    "        else:\n",
    "            # Add a new entry to the DataFrame\n",
    "            df = pd.concat(\n",
    "                [\n",
    "                    df,\n",
    "                    pd.DataFrame(\n",
    "                        [\n",
    "                            {\n",
    "                                \"tiktok_name\": name,\n",
    "                                \"tiktok_url\": transformed_url,\n",
    "                                \"date\": date,\n",
    "                            }\n",
    "                        ]\n",
    "                    ),\n",
    "                ],\n",
    "                ignore_index=True,\n",
    "            )\n",
    "\n",
    "# Save the updated DataFrame to the CSV file\n",
    "df.to_csv(csv_file, index=False)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
